Heat map to show the relationship between health problems diagnosed by MD vs chronic substance use
HELPdata <- read_csv("HELPdata.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## days_since_bl = col_logical(),
## days_since_prev = col_logical(),
## prev_time = col_logical(),
## a14g_t = col_character(),
## a17i_t = col_character(),
## c3f_t = col_character(),
## c3k_m = col_logical(),
## e18a = col_logical(),
## e18b = col_logical(),
## e18c = col_logical(),
## e18d = col_logical(),
## e18f = col_logical(),
## e18g = col_logical(),
## e18h = col_logical(),
## e18i = col_logical(),
## e18j = col_logical(),
## e18k = col_logical(),
## e18l = col_logical(),
## e18m = col_logical(),
## t1 = col_logical()
## # ... with 132 more columns
## )
## i Use `spec()` for the full column specifications.
names(HELPdata)
## [1] "id" "time" "num_intervals" "int_time1"
## [5] "days_since_bl" "int_time2" "days_since_prev" "prev_time"
## [9] "dead" "a1" "a9" "a10"
## [13] "a11a" "a11b" "a11c" "a11d"
## [17] "a11e" "a12b" "a13" "a14a"
## [21] "a14b" "a14c" "a14d" "a14e"
## [25] "a14f" "a14g" "a14g_t" "a15a"
## [29] "a15b" "a15c" "a16a" "a16b"
## [33] "a16c" "a17a" "a17b" "a17c"
## [37] "a17d" "a17e" "a17f" "a17g"
## [41] "a17h" "a17i" "a17i_t" "a18"
## [45] "b1" "b2" "b3a" "b3b"
## [49] "b3c" "b3d" "b3e" "b3f"
## [53] "b3g" "b3h" "b3i" "b3j"
## [57] "b4a" "b4b" "b4c" "b4d"
## [61] "b5a" "b5b" "b5c" "b6"
## [65] "b7" "b8" "b9a" "b9b"
## [69] "b9c" "b9d" "b9e" "b9f"
## [73] "b9g" "b9h" "b9i" "b10"
## [77] "b11a" "b11b" "b11c" "b11d"
## [81] "c1a" "c1b" "c1c" "c1d"
## [85] "c1e" "c1f" "c1g" "c1h"
## [89] "c1i" "c1j" "c1k" "c1l"
## [93] "c1m" "c2a1" "c2a2" "c2b1"
## [97] "c2b2" "c2c1" "c2c2" "c2d1"
## [101] "c2d2" "c2e1" "c2e2" "c2f1"
## [105] "c2f2" "c2g1" "c2g2" "c2h1"
## [109] "c2h2" "c2i1" "c2i2" "c2j1"
## [113] "c2j2" "c2k1" "c2k2" "c2l1"
## [117] "c2l2" "c2m1" "c2m2" "c2n1"
## [121] "c2n2" "c2o1" "c2o2" "c2p1"
## [125] "c2p2" "c2q1" "c2q2" "c2r1"
## [129] "c2r2" "c2s1" "c2s2" "c2t1"
## [133] "c2t2" "c2u1" "c2u2" "c2v1"
## [137] "c2v2" "c2w1" "c2w2" "c3a1"
## [141] "c3a2" "c3a3" "c3b1" "c3b2"
## [145] "c3b3" "c3c1" "c3c2" "c3c3"
## [149] "c3d" "c3e" "c3f1" "c3f2"
## [153] "c3f3" "c3f_t" "c3g1" "c3g2"
## [157] "c3g3" "c3g4" "c3h1" "c3h2"
## [161] "c3h3" "c3i" "c3j" "c3k"
## [165] "c3k_m" "d1" "d2" "d3"
## [169] "d4" "d5" "e2a" "e2b"
## [173] "e2c" "e3a" "e3b" "e3c"
## [177] "e4a" "e4b" "e4c" "e5a"
## [181] "e5b" "e6" "e7a" "e7b"
## [185] "e8a1" "e8a2" "e8a3" "e8a4"
## [189] "e9a" "e9b" "e10a" "e10b1"
## [193] "e10b2" "e10c19" "e11a" "e11b"
## [197] "e11c" "e12a" "e12b" "e13"
## [201] "e14a" "e14b" "e14c" "e14d"
## [205] "e14e" "e14f" "e14g" "e15a"
## [209] "e15b" "e15c1" "e15c2" "e15c3"
## [213] "e15c4" "e15c5" "e15c6" "e15c7"
## [217] "e15c8" "e15c9" "e15c10" "e15c11"
## [221] "e15c12" "e16a1" "e16a2" "e16a3"
## [225] "e16a4" "e16a5" "e16a6" "e16a7"
## [229] "e16a8" "e16a9" "e16a10" "e16a11"
## [233] "e16a12" "e16a13" "e18a" "e18b"
## [237] "e18c" "e18d" "e18f" "e18g"
## [241] "e18h" "e18i" "e18j" "e18k"
## [245] "e18l" "e18m" "f1a" "f1b"
## [249] "f1c" "f1d" "f1e" "f1f"
## [253] "f1g" "f1h" "f1i" "f1j"
## [257] "f1k" "f1l" "f1m" "f1n"
## [261] "f1o" "f1p" "f1q" "f1r"
## [265] "f1s" "f1t" "g1a" "g1a_30"
## [269] "g1b" "g1b_30" "g1c" "g1c_30"
## [273] "g1d" "g1d_30" "h1_30" "h1_lt"
## [277] "h1_rt" "h2_30" "h2_lt" "h2_rt"
## [281] "h3_30" "h3_lt" "h3_rt" "h4_30"
## [285] "h4_lt" "h4_rt" "h5_30" "h5_lt"
## [289] "h5_rt" "h6_30" "h6_lt" "h6_rt"
## [293] "h7_30" "h7_lt" "h7_rt" "h8_30"
## [297] "h8_lt" "h8_rt" "h9_30" "h9_lt"
## [301] "h9_rt" "h10_30" "h10_lt" "h10_rt"
## [305] "h11_30" "h11_lt" "h11_rt" "h12_30"
## [309] "h12_lt" "h12_rt" "h13_30" "h13_lt"
## [313] "h13_rt" "h14" "h15a" "h15b"
## [317] "h16a" "h16b" "h17a" "h17b"
## [321] "h18a" "h18b" "h19a" "h19b"
## [325] "i1" "i2" "i3" "i4"
## [329] "i5" "i6a" "i6b" "i7a"
## [333] "i7b" "i8" "j1" "j2"
## [337] "j3" "j4" "j5a" "j5b"
## [341] "j6" "j7" "j8" "j9"
## [345] "j10a" "j10b" "k1" "k2"
## [349] "k3" "l1" "l2" "l3"
## [353] "l4" "l5" "l6" "l7"
## [357] "l8" "l9" "l10" "l11"
## [361] "l12" "l13" "l14" "l15"
## [365] "l16" "l17" "l18" "l19"
## [369] "l20" "l21" "l22" "l23"
## [373] "l24" "l25" "m1" "m2"
## [377] "m3" "m4" "m5" "m6"
## [381] "m7" "m8" "m9" "m10"
## [385] "m11" "m12" "m13" "m14"
## [389] "m15" "m16" "m17" "m18"
## [393] "m19" "m20" "m21" "m22"
## [397] "m23" "m24" "m25" "m26"
## [401] "m27" "m28" "m29" "m30"
## [405] "m31" "m32" "m33" "m34"
## [409] "m35" "m36" "m37" "m38"
## [413] "m39" "m40" "m41" "m42"
## [417] "m43" "m44" "m45" "m46"
## [421] "m47" "m48" "m49" "m50"
## [425] "n1a" "n1b" "n1c" "n1d"
## [429] "n1e" "n1f" "n1g" "n1h"
## [433] "n1i" "n1j" "n1k" "n1l"
## [437] "n1m" "n1n" "n2a" "n2b"
## [441] "n2c" "n2d" "n2e" "n2f"
## [445] "n2g" "n2h" "n2i" "n2j"
## [449] "n2k" "n2l" "n2m" "n2n"
## [453] "o1a" "o1b" "o1c" "o1d"
## [457] "o2" "p1a" "p1b" "p1c"
## [461] "p2a" "p2b" "p2c" "p3"
## [465] "p4" "p5a" "p5b" "p5c"
## [469] "p6a" "p6b" "p6c" "p7"
## [473] "p8" "q1a" "q1b" "q2"
## [477] "q3" "q4" "q5" "q6"
## [481] "q7" "q8" "q9" "q10"
## [485] "q11" "q12" "q13" "q14"
## [489] "q15" "q16" "q17" "q18"
## [493] "q19" "q20" "r1a" "r1b"
## [497] "r1c" "r1d" "r1e" "r1f"
## [501] "r1g" "r1h" "r1i" "r1j"
## [505] "r1k" "r1l" "r1m" "r1n"
## [509] "r1o" "r1p" "r1q" "r1r"
## [513] "r1s" "s1a" "s1b" "s1c"
## [517] "s1d" "s1e" "s1f" "t1"
## [521] "t1b" "t1c" "t2" "t2b"
## [525] "t2c" "t3" "t3b" "t3c"
## [529] "u1" "u2a" "u2b" "u2c"
## [533] "u2d" "u2e" "u2f" "u2g"
## [537] "u2h" "u2i" "u2j" "u2k"
## [541] "u2l" "u2m" "u2n" "u2o"
## [545] "u2p" "u2q" "u2q_t" "u2r"
## [549] "u3a" "u3b" "u4" "u5"
## [553] "u6a" "u6b" "pcp_id" "u7a"
## [557] "u7a_t" "u8a" "u8b" "u8c"
## [561] "u8d" "u8e" "u8f" "u10a"
## [565] "u10b" "u10c" "u11" "u12"
## [569] "u13" "u14" "u15a" "u15b"
## [573] "u16a" "u16b" "u17" "u18a"
## [577] "u18b" "u19" "u20" "u21a"
## [581] "u21b" "u22a" "u22b" "u22c"
## [585] "u22d" "u22e" "u23" "u24a"
## [589] "u24b" "u24c" "u24d" "u24e"
## [593] "u25a" "u25b" "u25c" "u25d"
## [597] "u25e" "u25f" "u25g" "u25h"
## [601] "u25i" "u26a" "u26b" "u26c"
## [605] "u26d" "u26e" "u26f" "u26g"
## [609] "u26h" "u26i" "u27a" "u27b"
## [613] "u27c" "u27d" "u27e" "u27f"
## [617] "u27g" "u28" "u29a" "u29b"
## [621] "u29c" "u29d" "u30" "u31"
## [625] "u32a" "u32b" "u32c" "u32d"
## [629] "u32d_t" "u33" "u34" "u35a"
## [633] "u35b" "u35c" "u35d" "u35e"
## [637] "u35f" "u36" "u37" "u38"
## [641] "u39" "v1" "v2" "z1"
## [645] "z2" "age" "realm" "e16a_rt"
## [649] "e16a_ib" "e16a_tm" "e16a_dd" "group"
## [653] "mmsec" "prim_sub" "secd_sub" "alcohol"
## [657] "coc_her" "realm2" "realm3" "race"
## [661] "race2" "birthplc" "primlang" "md_lang"
## [665] "hs_grad" "mar_stat" "a12b_rec" "unemploy"
## [669] "alone6m" "homeless" "jail_mos" "jail_5yr"
## [673] "gov_supp" "a18_rec1" "a18_rec2" "std_ever"
## [677] "std_6m" "chr_sum" "chr_ever" "epi_sum"
## [681] "epi_6m" "epi_6m2b" "ser_inj" "d3_rec"
## [685] "d4_rec" "d5_rec" "any_ins" "frml_sat"
## [689] "e10b1_r" "e10b2_r" "alt_trt" "any_util"
## [693] "num_barr" "g1b_rec" "g1d_rec" "primsub2"
## [697] "alcq_30" "h2_prb" "h3_prb" "h4_prb"
## [701] "h5_prb" "h6_prb" "h7_prb" "h8_prb"
## [705] "h9_prb" "h10_prb" "h11_prb" "h12_prb"
## [709] "polysub" "smoker" "o1b_rec" "o1c_rec"
## [713] "o1d_rec" "o2_rec" "phyabuse" "sexabuse"
## [717] "phsxabus" "abuse2" "abuse3" "curphyab"
## [721] "cursexab" "curphysexab" "famabuse" "strabuse"
## [725] "abuse" "rawpf" "pf" "rawrp"
## [729] "rp" "rawbp" "bp" "rawgh"
## [733] "gh" "rawvt" "vt" "rawsf"
## [737] "sf" "rawre" "re" "rawmh"
## [741] "mh" "ht" "pcs" "mcs"
## [745] "ces_d" "cesd_cut" "c_ms" "c_au"
## [749] "c_du" "cuad_c" "cuad_h" "raw_re"
## [753] "dec_re" "raw_am" "dec_am" "raw_ts"
## [757] "dec_ts" "raw_ads" "phys" "phys2"
## [761] "inter" "intra" "impul" "impul2"
## [765] "sr" "cntrl" "indtot" "indtot2"
## [769] "pss_fr" "pss_fa" "drugrisk" "sexrisk"
## [773] "totalrab" "rabscale" "chr_6m" "rct_link"
## [777] "reg_md" "any_vis" "any_vis_cumul" "pc_rec"
## [781] "pc_rec7" "satreat" "drinkstatus" "daysdrink"
## [785] "anysubstatus" "daysanysub" "linkstatus" "dayslink"
ggplot(data=HELPdata) + geom_bar(mapping=aes(x=age, fill=as.factor(smoker)))
as.data.frame(table(HELPdata$c1a, HELPdata$h2_prb))$Freq[4]
## [1] 29
adjacent <- matrix(c(29, 15, 4, 10, 9, 15, 21, 8, 23, 10, 1,
55, 20, 6, 8, 5, 15, 47, 7, 39, 15, 3,
11, 6, 3, 3, 3, 3, 8, 0, 5, 3, 2,
3, 3, 2, 2, 0, 2, 2, 0, 1, 1, 1,
7, 5, 2, 2, 1, 3, 5, 0, 3, 2, 1,
53, 26, 5, 7, 3, 16, 37, 4, 37, 12, 2,
30, 25, 6, 11, 6, 17, 27, 8, 25, 11, 1,
1, 2, 0, 1, 0, 0, 1, 0, 1, 0, 0,
24, 7, 1, 4, 6, 6, 19, 6, 16, 8, 0,
10, 3, 2, 3, 4, 4, 7, 2, 7, 4, 0,
4, 2, 0, 0, 1, 1, 3, 2, 3, 2, 0,
6, 3, 1, 0, 0, 1, 7, 0, 3, 1, 0,
2, 3, 2, 1, 1, 2, 3, 1, 2, 2, 2), nrow=13, ncol=11, byrow = TRUE)
adjacent
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 29 15 4 10 9 15 21 8 23 10 1
## [2,] 55 20 6 8 5 15 47 7 39 15 3
## [3,] 11 6 3 3 3 3 8 0 5 3 2
## [4,] 3 3 2 2 0 2 2 0 1 1 1
## [5,] 7 5 2 2 1 3 5 0 3 2 1
## [6,] 53 26 5 7 3 16 37 4 37 12 2
## [7,] 30 25 6 11 6 17 27 8 25 11 1
## [8,] 1 2 0 1 0 0 1 0 1 0 0
## [9,] 24 7 1 4 6 6 19 6 16 8 0
## [10,] 10 3 2 3 4 4 7 2 7 4 0
## [11,] 4 2 0 0 1 1 3 2 3 2 0
## [12,] 6 3 1 0 0 1 7 0 3 1 0
## [13,] 2 3 2 1 1 2 3 1 2 2 2
rownames(adjacent) <- c("seizure", "asthma", "heart attack", "heart failure", "other heart dis", "Hypertension", "chronic liver disease", "kidney failure", "chronic osteoarthritis", "peripheral neuropathy", "cancer", "diabetes", "stroke")
colnames(adjacent) <- c("alcohol", "heroin", "methadone", "other opiates", "barbiturates", "sedatives", "cocaine", "amphetamines", "marijuana", "hallucinogens", "inhalants")
fig <- plot_ly(
x = c("alcohol", "heroin", "methadone", "other opiates", "barbiturates", "sedatives", "cocaine", "amphetamines", "marijuana", "hallucinogens", "inhalants"), y = c("seizure", "asthma", "heart attack", "heart failure", "other heart dis", "Hypertension", "chronic liver disease", "kidney failure", "chronic osteoarthritis", "peripheral neuropathy", "cancer", "diabetes", "stroke"),
z = adjacent, type = "heatmap"
)
fig <- fig %>% layout(
title = "Relationship between substance use and MD diagnosis",
xaxis = list(title = "Substance use"),
yaxis = list(title = "MD Diagnosis"))
fig
Pie chart to show how people rate their own health in the whole dataset.
health_rate <- as.data.frame(table(HELPdata$b11d))
names(health_rate) <- c("rating", "count")
health_rate %>% ggplot(aes(x="", y=count, fill=rating)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)
This visualization checks the hypothesis that a higher pain score corresponds to a lower self rating of health. The lower score means the subject is in greater pain. We see that as pain score increases, the people who rated their health to be excellent increases, which makes sense.
self_health <- HELPdata %>% select(bp, b11d)
self_health <- self_health %>% mutate(pain_score = case_when(bp <= 25 ~ "0-25",
bp <= 50 ~ "26-50",
bp <= 75 ~ "51-75",
bp <= 100 ~ "76-100"))
self_health <- self_health %>% mutate(good_health = case_when(b11d == 1 ~ "Definitely true",
b11d == 2 ~ "Mostly true",
b11d == 3 ~ "Don't know",
b11d == 4 ~ "Mostly false",
b11d == 5 ~ "Definitely false"))
self_health_pct <- self_health %>% group_by(pain_score, good_health) %>%
summarize(Total=n()) %>%
group_by(pain_score) %>%
mutate(pct=Total/sum(Total))
## `summarise()` has grouped output by 'pain_score'. You can override using the `.groups` argument.
self_health_pct %>%
ggplot() + geom_bar(aes(x=pain_score, y=pct, fill=good_health), stat="identity") +
labs(fill = "My health is excellent")
Checking the hypothesis that the people of older age will more likely believe that their health will get worse. This is mostly true as the percentage of people who believe their health will get worse increases as age increases.
expected_health <- HELPdata %>% select(age, b11c, bp)
expected_health <- expected_health %>% mutate(age_group = case_when(age <= 20 ~ "0-20s",
age <= 30 ~ "30s",
age <= 40 ~ "40s",
age <= 50 ~ "50s",
age <= 60 ~ "60+"))
expected_health <- expected_health %>% mutate(expected_hel = case_when(b11c == 1 ~ "Definitely true",
b11c == 2 ~ "Mostly true",
b11c == 3 ~ "Don't know",
b11c == 4 ~ "Mostly false",
b11c == 5 ~ "Definitely false"))
expected_health <- expected_health %>% mutate(pain_score = case_when(bp <= 25 ~ "0-25",
bp <= 50 ~ "26-50",
bp <= 75 ~ "51-75",
bp <= 100 ~ "76-100"))
health_pct <- expected_health %>% group_by(age_group, expected_hel) %>%
summarize(Total=n()) %>%
group_by(age_group) %>%
mutate(pct=Total/sum(Total))
## `summarise()` has grouped output by 'age_group'. You can override using the `.groups` argument.
health_pct %>%
ggplot() + geom_bar(aes(x=age_group, y=pct, fill=expected_hel), stat="identity") +
labs(fill = "I expect my health to get worse")
Experimenting with more SF-36 scores, we check whether the general health perceptions scores correlated with the pain scores. The higher general health scores should mean higher pain scores since the subjects are under less bodily pain. This seems to be the case, meaning the SF-36 scores relate pretty well to one and other.
HELPdata %>%
ggplot() + geom_bar(aes(x=a9, fill=as.factor(d5_rec)))
## Warning: Removed 3 rows containing non-finite values (stat_count).
HELPdata <- HELPdata %>% mutate(pain_score = case_when(bp <= 25 ~ "0-25",
bp <= 50 ~ "26-50",
bp <= 75 ~ "51-75",
bp <= 100 ~ "76-100"))
HELPdata %>% ggplot() +
geom_jitter(aes(x=age, y=gh, color=pain_score)) +
labs(x="age", y="SF-36 General Health Perceptions score", color="SF-36 Pain score")
Here we investigate the relationship between mental health and general health. This visualisation is an effort to answer the question, do individuals incorporate their mental health to how they rate their mental health. It seems that the majority of people who think their general health is bad also are suffering from poor mental health, which means that mental health is also considered in overall general health.
depres <- HELPdata %>% select(s1a, s1b, s1c, f1f)
depres <- depres %>% mutate(depressed = case_when(f1f == 0 ~ 0,
TRUE ~ 1))
mental <- matrix(table(HELPdata$f1f, HELPdata$b11d), ncol=5, nrow=4)
fig1 <- plot_ly(x = c("Definitely true", "Mostly true", "Don't know", "Mostly false", "Definitely false"), y = c("Rarely/never", "Some of the time", "Occas/moderately", "Most of the time"),
z = mental, type = "heatmap")
fig1 <- fig1 %>% layout(
title = "Relationship Personal mental health perception and general health perception",
xaxis = list(title = "My health is excellent"),
yaxis = list(title = "I felt depressed"))
fig1
Now we investigate education as education may affect the individuals perception of health care, their knowledge of the health care system and their ability to self diagnose themselves. We plot a quick histogram to see whether years of education correspond with the high school variable and it does. 12 years of formal education is when high school finishes, and this is the case here.
Next we see if high school graduates have recently used a health care service, and whether there is a difference between groups which are high school graduates and not. There doesn’t seem to be any differences between the two groups.
So we also check whether the high school graduates groups differ in their thought as to whether medical treatment is important. Again, there is no difference between the two groups.
HELPdata %>%
ggplot() + geom_bar(aes(x=a9, fill=as.factor(hs_grad)))
## Warning: Removed 3 rows containing non-finite values (stat_count).
table(HELPdata$hs_grad, HELPdata$any_util)
##
## 0 1
## 0 22 83
## 1 45 193
edu_health <- HELPdata %>% select(hs_grad, any_util)
edu_health <- na.omit(edu_health)
edu_health %>%
ggplot() + geom_bar(aes(x=hs_grad, fill=as.factor(any_util)))
edu_pct <- edu_health %>% group_by(hs_grad, any_util) %>%
summarize(Total=n()) %>%
group_by(hs_grad) %>%
mutate(pct=Total/sum(Total))
## `summarise()` has grouped output by 'hs_grad'. You can override using the `.groups` argument.
edu_pct %>%
ggplot() + geom_bar(aes(x=hs_grad, y=pct, fill=as.factor(any_util)), stat="identity") +
labs(fill = "Any recent health utilization")
HELPdata %>%
ggplot() + geom_bar(aes(x=hs_grad, fill=as.factor(d5_rec)))
## Warning: Removed 3 rows containing non-finite values (stat_count).
Survival Analysis
HELPdata_survfit <- survfit(Surv(dayslink, linkstatus) ~ a18, data=HELPdata)
ggsurvplot(HELPdata_survfit, conf.type = “TRUE”) + ggtitle(“KM-curve”)
coxph(Surv(dayslink, linkstatus) ~ group, data=HELPdata) %>% tidy()